chooseCRANmirror(graphics=FALSE, ind=1)
knitr::opts_chunk$set(echo = TRUE)

Load Libraries

library(tidyverse) #  data manipulation and graphs
## -- Attaching packages -------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.1
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ----------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(stringr) #  string manipulation
library(lubridate) #  date manipulation
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library('wordcloud') #  wordcloud
## Warning: package 'wordcloud' was built under R version 3.5.3
## Loading required package: RColorBrewer
library(tidytext) # tidy implementation of NLP methods
## Warning: package 'tidytext' was built under R version 3.5.3
library(DT)       # table format display of data
## Warning: package 'DT' was built under R version 3.5.3
library(leaflet) # maps
## Warning: package 'leaflet' was built under R version 3.5.3
library(igraph) #  graphs
## Warning: package 'igraph' was built under R version 3.5.3
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:lubridate':
## 
##     %--%, union
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(ggraph) #  graphs
## Warning: package 'ggraph' was built under R version 3.5.3
library(topicmodels) # for LDA topic modelling 
## Warning: package 'topicmodels' was built under R version 3.5.3
library(tm) # general text mining functions, making document term matrixes
## Warning: package 'tm' was built under R version 3.5.3
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 3.5.2
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(SnowballC) # for stemming
## Warning: package 'SnowballC' was built under R version 3.5.2
library(textcat)
## Warning: package 'textcat' was built under R version 3.5.3
library(dplyr)
rm(list=ls())
fillColor = "#FFA07A"
fillColor2 = "#F1C40F"

Read the business data

business_raw <- read.csv(file="yelp_business.csv", header=TRUE, sep=",")
business <- business_raw

Business data

target <- c("NV", "AZ")
business = business %>% filter(state %in% target)
datatable(head(business), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

#Most Popular Categories

The most popular categories of business are plotted in the bar plot

categories = str_split(business$categories,";")
categories = as.data.frame(unlist(categories))
colnames(categories) = c("Name")

categories %>%
  group_by(Name) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(Name = reorder(Name,Count)) %>%
  head(10) %>%
  
  
  ggplot(aes(x = Name,y = Count)) +
  geom_bar(stat='identity',colour="white", fill =fillColor2) +
  geom_text(aes(x = Name, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Name of Category', y = 'Count', 
       title = 'Top 10 Categories of Business') +
  coord_flip() + 
  theme_bw()

Top Ten Cities with the most Business parties mentioned in Yelp

We show the Top Ten Cities which has the most Business parties mentioned in Yelp

business %>%
  group_by(city) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(City = reorder(city,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = City,y = Count)) +
  geom_bar(stat='identity',colour="white", fill =fillColor) +
  geom_text(aes(x = City, y = 1, label = paste0("(",round(Count/1e3)," K )",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'City', y = 'Count of Reviews', 
       title = 'Top Ten Cities with the most Business parties in Yelp') +
  coord_flip() + 
  theme_bw()

#Map of the business parties in Nevada

Seems from the map that most of the business is in the neighborhood of The Strip in Las Vagas.

From Wikipedia

The Las Vegas Strip is a stretch of South Las Vegas Boulevard in Clark County, Nevada that is known for its concentration of resort hotels and casinos. The Strip is approximately 4.2 miles (6.8 km) in length,[1] located immediately south of the Las Vegas city limits in the unincorporated towns of Paradise and Winchester.

NavadaCoords = business %>% filter(state == "NV")

center_lon = median(NavadaCoords$longitude,na.rm = TRUE)
center_lat = median(NavadaCoords$latitude,na.rm = TRUE)

leaflet(NavadaCoords) %>% addProviderTiles("Esri.NatGeoWorldMap") %>%
  addCircles(lng = ~longitude, lat = ~latitude,radius = ~sqrt(review_count))  %>%
  
  # controls
  setView(lng=center_lon, lat=center_lat,zoom = 13)

Map of the business parties in Arizona

ArizonaCoords = business %>% filter(state == "AZ")

center_lon = median(ArizonaCoords$longitude,na.rm = TRUE)
center_lat = median(ArizonaCoords$latitude,na.rm = TRUE)

leaflet(ArizonaCoords) %>% addProviderTiles("Esri.NatGeoWorldMap") %>%
  addCircles(lng = ~longitude, lat = ~latitude,radius = ~sqrt(review_count))  %>%
  
  # controls
  setView(lng=center_lon, lat=center_lat,zoom = 13)

Read the review data

reviews_raw <- read.csv(file="yelp_review.csv", header=TRUE, sep=",")
reviews <- reviews_raw
datatable(head(reviews), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))

Reviews data

A glimpse of the reviews data

glimpse(reviews)
## Observations: 5,261,668
## Variables: 9
## $ review_id   <fct> vkVSCC7xljjrAI4UGfnKEQ, n6QzIUObkYshz4dz2QRJTw, MV...
## $ user_id     <fct> bv2nCi5Qv5vroFiqKGopiw, bv2nCi5Qv5vroFiqKGopiw, bv...
## $ business_id <fct> AEx2SYEUJmTxVVB18LlCwA, VR6GpWIda3SfvPC-lg9H3w, CK...
## $ stars       <int> 5, 5, 5, 4, 4, 5, 4, 4, 3, 5, 4, 3, 1, 3, 5, 4, 1,...
## $ date        <fct> 2016-05-28, 2016-05-28, 2016-05-28, 2016-05-28, 20...
## $ text        <fct> "Super simple place but amazing nonetheless. It's ...
## $ useful      <int> 0, 0, 0, 0, 0, 1, 0, 1, 1, 3, 1, 5, 9, 2, 2, 0, 0,...
## $ funny       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,...
## $ cool        <int> 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,...

Detecting the language of the reviews

Detecting the language of the first Ten reviews.

textcat(reviews[1:10,]$text)
##  [1] "english" "english" "english" "english" "english" "english" "english"
##  [8] "english" "english" "english"

Business with most Five Star Reviews from Users

The following plot shows the names of business with the most Five Star Reviews.Mon Ami Gabi and Bacchanal Buffet are the Two most popular restaurants from the Yelp reviews with Five Star ratings. We will do a deep dive for these restaurants.

most5StarsReviews = reviews %>%
  filter(stars == 5) %>%
  group_by(business_id) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(BusinessID = reorder(business_id,Count)) %>%
  head(10)
most5StarsReviews = inner_join(most5StarsReviews,business)
## Joining, by = "business_id"
most5StarsReviews %>%
  mutate(name = reorder(name,Count)) %>%
  ggplot(aes(x = name,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor) +
  geom_text(aes(x = name, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Name of the Business', 
       y = 'Count', 
       title = 'Name of the Business and Count') +
  coord_flip() +
  theme_bw()

“Mon Ami Gabi”

The location and category of the most liked business Mon Ami Gabi is shown below

mon_ami_gabi = business %>% filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw") %>%
  select(name,neighborhood,city,state,postal_code,categories)

datatable(head(mon_ami_gabi), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))

##Useful,funny,cool reviews

The following plot describes the number of Useful, Funny and Cool reviews.Most of the reviews are NOT useful , funny or cool.

mon_ami_gabi_reviews = reviews %>%
  filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw")

mon_ami_gabi_reviews %>%
  group_by(useful) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(useful = reorder(useful,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = useful,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor) +
  geom_text(aes(x = useful, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Useful Reviews', 
       y = 'Count', 
       title = 'Useful Reviews and Count') +
  coord_flip() +
   theme_bw()

mon_ami_gabi_reviews %>%
  group_by(funny) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(funny = reorder(funny,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = funny,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor2) +
  geom_text(aes(x = funny, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Funny Reviews', 
       y = 'Count', 
       title = 'Funny Reviews and Count') +
  coord_flip() +
   theme_bw()

mon_ami_gabi_reviews %>%
  group_by(cool) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(cool = reorder(cool,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = cool,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor) +
  geom_text(aes(x = cool, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Cool Reviews', 
       y = 'Count', 
       title = 'Cool Reviews and Count') +
  coord_flip() +
   theme_bw()

Word Cloud of Mon Ami Gabi

A word cloud is a graphical representation of frequently used words in the text. The height of each word in this picture is an indication of frequency of occurrence of the word in the entire text. The words steak, service, vegas,french,patio,bellagio,delicious, nice are the words which have been used very frequently in the reviews.Note that if we choose a word which is not food related , it is Service and we will see in the subsequent sections of sentiment analysis and topic modelling , why this keyword is important.

#createWordCloud = function(train)
#{
 # train %>%
#  unnest_tokens(word, text) %>%
#  filter(!word %in% stop_words$word) %>%
#  count(word,sort = TRUE) %>%
#  ungroup()  %>%
#  head(30) %>%
  
#  with(wordcloud(word, n, max.words = 30,colors=brewer.pal(8, "Dark2")))
#}
#createWordCloud(reviews %>%
#  filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw"))
library(gdata) 
## gdata: Unable to locate valid perl interpreter
## gdata: 
## gdata: read.xls() will be unable to read Excel XLS and XLSX files
## gdata: unless the 'perl=' argument is used to specify the location
## gdata: of a valid perl intrpreter.
## gdata: 
## gdata: (To avoid display of this message in the future, please
## gdata: ensure perl is installed and available on the executable
## gdata: search path.)
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLX' (Excel 97-2004) files.
## 
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLSX' (Excel 2007+) files.
## 
## gdata: Run the function 'installXLSXsupport()'
## gdata: to automatically download and install the perl
## gdata: libaries needed to support Excel XLS and XLSX formats.
## 
## Attaching package: 'gdata'
## The following objects are masked from 'package:dplyr':
## 
##     combine, first, last
## The following object is masked from 'package:purrr':
## 
##     keep
## The following object is masked from 'package:stats':
## 
##     nobs
## The following object is masked from 'package:utils':
## 
##     object.size
## The following object is masked from 'package:base':
## 
##     startsWith
library(ggplot2)
library(openintro) 
## Please visit openintro.org for free statistics materials
## 
## Attaching package: 'openintro'
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
## The following objects are masked from 'package:datasets':
## 
##     cars, trees
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(zipcode)
#1. Read the data
df <- read_xlsx("MedianZIP.xlsx")
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
head(df)
## # A tibble: 6 x 4
##   `Data from: http://www.psc.isr.umich.edu/d~ ...2        ...3        ...4 
##   <chr>                                       <chr>       <chr>       <chr>
## 1 Zip                                         Median      Mean        Pop  
## 2 1001                                        56662.5734~ 66687.7508~ 16445
## 3 1002                                        49853.4176~ 75062.6343~ 28069
## 4 1003                                        28462       35121       8491 
## 5 1005                                        75423       82442       4798 
## 6 1007                                        79076.3540~ 85801.9750~ 12962
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame':    32635 obs. of  4 variables:
##  $ Data from: http://www.psc.isr.umich.edu/dis/census/Features/tract2zip/: chr  "Zip" "1001" "1002" "1003" ...
##  $ ...2                                                                  : chr  "Median" "56662.573499999999" "49853.417699999998" "28462" ...
##  $ ...3                                                                  : chr  "Mean" "66687.750899999999" "75062.634300000005" "35121" ...
##  $ ...4                                                                  : chr  "Pop" "16445" "28069" "8491" ...
View(df)
#2. Clean up the dataframe
#2a. Remove unneeded information
df <- df[-1,]
#2b. Update column names (zip, median, mean, population)
colnames(df) <- c("zip","median","mean","population")
#remove commas and make numeric
df$median <- as.numeric(gsub(",", "", df$median))
df$mean <- as.numeric(gsub(",", "", df$mean))
## Warning: NAs introduced by coercion
df$population <- as.numeric(gsub(",", "", df$population))

#3. Load the 'zipcode' package
data(zipcode)
df$zip <- clean.zipcodes(df$zip) #reformat the zip codes

#4. Merge the zip code information from two data frames
dfNew <- merge(df, zipcode, by ="zip")
head(dfNew)
##     zip   median     mean population        city state latitude longitude
## 1 01001 56662.57 66687.75      16445      Agawam    MA 42.07061 -72.62029
## 2 01002 49853.42 75062.63      28069     Amherst    MA 42.37765 -72.50323
## 3 01003 28462.00 35121.00       8491     Amherst    MA 42.36956 -72.63599
## 4 01005 75423.00 82442.00       4798       Barre    MA 42.41209 -72.10443
## 5 01007 79076.35 85801.98      12962 Belchertown    MA 42.27842 -72.41100
## 6 01008 63980.00 78391.00       1244   Blandford    MA 42.17431 -72.94828
#5. Remove Hawaii and Alaska
dfNew <- dfNew[dfNew$state != "HI", ]
dfNew <- dfNew[dfNew$state != "AK", ]
dfNew <- dfNew[dfNew$state != "DC", ]

#Step   2: Show the income  &   population  per state
#  1) Create    a   simpler dataframe,  with    just    the average median  income  and the the population  for each    state.
#  2) Add   the state   abbreviations   and the state   names   as  new columns (make   sure    the state   names   are all lower   case)
#  3) Show  the U.S.    map,    representing    the color   with    the average median  income  of  that state
#  4) Create    a   second  map with    color   representing    the population  of  the state
#Create a simpler dataframe with median income and population
income <- tapply(dfNew$median, dfNew$state, mean)
state <- rownames(income)
medianIncome <- data.frame(state, income)

pop <- tapply(dfNew$population, dfNew$state, sum)
state <- rownames(pop)
statePop <- data.frame(state,pop)

dfSimple <- merge(medianIncome, statePop, by="state")
head(dfSimple)
##   state   income      pop
## 1    AL 40549.90  4770242
## 2    AR 36960.95  2936699
## 3    AZ 48132.06  6360679
## 4    CA 62628.71 36927999
## 5    CO 56303.02  4979279
## 6    CT 78520.16  3548308
# previous steps can be done using sql instead and scaling the income at the state level
dfSimple<- sqldf("select state, avg(median) as income, sum(population) as pop from dfNew group by state")
dfSimple<- sqldf("select state, (income/pop) as income, pop from dfSimple")

head(dfSimple)
##   state      income      pop
## 1    AL 0.008500597  4770242
## 2    AR 0.012585883  2936699
## 3    AZ 0.007567126  6360679
## 4    CA 0.001695968 36927999
## 5    CO 0.011307464  4979279
## 6    CT 0.022128902  3548308
#Add state abbreviations and state names (lower case)
#use match(dfSimple$state,state.abb)
dfSimple$stateName <- state.name[match(dfSimple$state,state.abb)]
#convert to lower case
dfSimple$stateName <- tolower(dfSimple$stateName)

#Show us map, representing color with average median income
us <- map_data("state")
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
#Function To remove axis formats from the heatmaps
ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)

Show the U.S. map, representing the color with the average median income of that state

MapIncome <- ggplot(dfSimple, aes(map_id=stateName))
MapIncome <- MapIncome + geom_map(map=us, aes(fill=dfSimple$income))
MapIncome <- MapIncome + expand_limits(x=us$long, y=us$lat)

MapIncome <- MapIncome + coord_map()
MapIncome <- MapIncome + ggtitle("average median income by state") + theme(plot.title = element_text(hjust=0.5))
MapIncome <- MapIncome + guides(fill=guide_legend(title="Income")) + ditch_the_axes
MapIncome

# Create    a   second  map with    color   representing    the population  of  the state
MapPopulation <- ggplot(dfSimple, aes(map_id=stateName))
MapPopulation <- MapPopulation + geom_map(map=us, aes(fill=dfSimple$pop))
MapPopulation <- MapPopulation + expand_limits(x=us$long, y=us$lat)

MapPopulation <- MapPopulation + coord_map()
MapPopulation <- MapPopulation + ggtitle("Population by state") + theme(plot.title = element_text(hjust=0.5))
MapPopulation <- MapPopulation + guides(fill=guide_legend(title="Population")) + ditch_the_axes
MapPopulation

Step 3: Show the income per zip code

1) Have draw each zip code on the map, where the color of the ‘dot’ is based on the median income. To make the map look appealing,

have the background of the map be black.

Draw each zip code on map where color of dot is based on median income

dfNew$stateName <- state.name[match(dfNew$state,state.abb)]
dfNew$stateName <- tolower(dfNew$stateName)

MapZip <- ggplot(dfNew, aes(map_id=stateName))
MapZip <- MapZip + geom_map(map=us, fill="black", color="white")
MapZip <- MapZip + expand_limits(x=us$long, y=us$lat)

MapZip <- MapZip + geom_point(data=dfNew, aes(x=dfNew$longitude, y=dfNew$latitude, color=dfNew$median))

MapZip <- MapZip + coord_map()
MapZip <- MapZip + ggtitle("income per zip code") + theme(plot.title=element_text(hjust=0.5))
MapZip <- MapZip  + ditch_the_axes
MapZip

head(dfNew)
##     zip   median     mean population        city state latitude longitude
## 1 01001 56662.57 66687.75      16445      Agawam    MA 42.07061 -72.62029
## 2 01002 49853.42 75062.63      28069     Amherst    MA 42.37765 -72.50323
## 3 01003 28462.00 35121.00       8491     Amherst    MA 42.36956 -72.63599
## 4 01005 75423.00 82442.00       4798       Barre    MA 42.41209 -72.10443
## 5 01007 79076.35 85801.98      12962 Belchertown    MA 42.27842 -72.41100
## 6 01008 63980.00 78391.00       1244   Blandford    MA 42.17431 -72.94828
##       stateName
## 1 massachusetts
## 2 massachusetts
## 3 massachusetts
## 4 massachusetts
## 5 massachusetts
## 6 massachusetts

Step 4: Show Zip Code Density

1) Now generate a different map, one where we can easily see where there are lots of zip codes, and where there are few (using the ‘stat_density2d’ function).

Generate a map showing density of zip codes

MapDensity <- ggplot(dfNew, aes(map_id=stateName))
MapDensity <- MapDensity + geom_map(map=us, fill="black", color="white")

MapDensity <- MapDensity + expand_limits(x=us$long, y=us$lat)

MapDensity <- MapDensity + stat_density_2d(data=dfNew, aes(x=dfNew$longitude, y=dfNew$latitude))

MapDensity <- MapDensity + coord_map()
MapDensity <- MapDensity + ggtitle("zip code density") + theme(plot.title=element_text(hjust=0.5))
MapDensity

#Step 5: Zoom in to the region around Nevada # 1) Repeat steps 3 & 4, but have the image / map be of the northeast U.S. (centered around Vegas).

zoomGeo <- geocode("Las Vegas, NV", source = "dsk")
## Source : http://www.datasciencetoolkit.org/maps/api/geocode/json?address=Las+Vegas,+NV
zoomAmount <- 10

centerx <- zoomGeo$lon
centery <- zoomGeo$lat

ylimit <- c(centery-zoomAmount, centery+zoomAmount)
xlimit <- c(centerx-zoomAmount, centerx+zoomAmount)

#Show income per zip code around Vegas
MapZipZoom <- MapZip + xlim(xlimit) + ylim(ylimit) + coord_map()
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
MapZipZoom <- MapZipZoom + geom_point(aes(x=centerx, y=centery), color="darkred", size=3)

MapZipZoom <- MapZipZoom + ggtitle("Income by Zip around Vegas ") + theme(plot.title=element_text(hjust=0.5))
MapZipZoom
## Warning: Removed 28232 rows containing missing values (geom_point).

zoomGeo <- geocode("Phoenix, AZ", source = "dsk")
## Source : http://www.datasciencetoolkit.org/maps/api/geocode/json?address=Phoenix,+AZ
zoomAmount <- 10

centerx <- zoomGeo$lon
centery <- zoomGeo$lat

ylimit <- c(centery-zoomAmount, centery+zoomAmount)
xlimit <- c(centerx-zoomAmount, centerx+zoomAmount)

#Show income per zip code around Phoenix
MapZipZoom <- MapZip + xlim(xlimit) + ylim(ylimit) + coord_map()
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
MapZipZoom <- MapZipZoom + geom_point(aes(x=centerx, y=centery), color="darkred", size=3)

MapZipZoom <- MapZipZoom + ggtitle("Income by Zip around Phoenix ") + theme(plot.title=element_text(hjust=0.5))
MapZipZoom
## Warning: Removed 28735 rows containing missing values (geom_point).

#devtools::install_github("UrbanInstitute/urbnmapr")

house ownership rate

#devtools::install_github("UI-Research/urbnthemes")
library(urbnmapr)
library(urbnthemes)
## Setting Windows options...
## 
## Attaching package: 'urbnthemes'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_bar, geom_col, geom_jitter, geom_line, geom_path,
##     geom_point, geom_step, geom_text, scale_color_discrete,
##     scale_color_gradientn, scale_colour_discrete,
##     scale_colour_gradientn, scale_colour_ordinal,
##     scale_fill_discrete, scale_fill_gradientn, scale_fill_ordinal
spatial_data <- left_join(statedata,
                          get_urbn_map(map = "states", sf = TRUE),
                          by = "state_name")

ggplot() +
  geom_sf(spatial_data,
          mapping = aes(fill = horate),
          color = "#ffffff", size = 0.25) +
  labs(fill = "Homeownership rate")
## Warning: Computation failed in `stat_sf()`:
## no applicable method for 'st_bbox' applied to an object of class "list"

set_urbn_defaults(style = "map")
states_sf <- get_urbn_map(map = "states", sf = TRUE)

statedata %>% 
  left_join(states_sf, by = "state_name") %>% 
  ggplot() +
  geom_sf(mapping = aes(fill = horate),
          color = "#ffffff", size = 0.25) +
  scale_fill_gradientn(labels = scales::percent) +
  labs(fill = "Homeownership rate") +
  coord_sf(datum = NA)